ARIMA

Aquifer

Petrignano

Data preparation

df <- as.data.frame(read.table('./acea-water-prediction/Aquifer_Petrignano.csv', header = TRUE, sep=','))
head(df, n=20)
##       п.їDate Rainfall_Bastia_Umbra Depth_to_Groundwater_P24
## 1  14/03/2006                    NA                   -22.48
## 2  15/03/2006                    NA                   -22.38
## 3  16/03/2006                    NA                   -22.25
## 4  17/03/2006                    NA                   -22.38
## 5  18/03/2006                    NA                   -22.60
## 6  19/03/2006                    NA                   -22.35
## 7  20/03/2006                    NA                   -22.50
## 8  21/03/2006                    NA                   -22.31
## 9  22/03/2006                    NA                   -22.31
## 10 23/03/2006                    NA                   -22.40
## 11 24/03/2006                    NA                   -22.32
## 12 25/03/2006                    NA                   -22.25
## 13 26/03/2006                    NA                   -22.15
## 14 27/03/2006                    NA                   -22.47
## 15 28/03/2006                    NA                   -22.27
## 16 29/03/2006                    NA                   -22.52
## 17 30/03/2006                    NA                   -22.50
## 18 31/03/2006                    NA                   -22.70
## 19 01/04/2006                    NA                   -22.77
## 20 02/04/2006                    NA                   -22.49
##    Depth_to_Groundwater_P25 Temperature_Bastia_Umbra Temperature_Petrignano
## 1                    -22.18                       NA                     NA
## 2                    -22.14                       NA                     NA
## 3                    -22.04                       NA                     NA
## 4                    -22.04                       NA                     NA
## 5                    -22.04                       NA                     NA
## 6                    -21.95                       NA                     NA
## 7                    -21.99                       NA                     NA
## 8                    -21.89                       NA                     NA
## 9                    -21.82                       NA                     NA
## 10                   -21.89                       NA                     NA
## 11                   -21.89                       NA                     NA
## 12                   -21.82                       NA                     NA
## 13                   -21.79                       NA                     NA
## 14                   -21.83                       NA                     NA
## 15                   -21.75                       NA                     NA
## 16                   -21.81                       NA                     NA
## 17                   -21.84                       NA                     NA
## 18                   -21.87                       NA                     NA
## 19                   -21.90                       NA                     NA
## 20                   -21.83                       NA                     NA
##    Volume_C10_Petrignano Hydrometry_Fiume_Chiascio_Petrignano
## 1                     NA                                   NA
## 2                     NA                                   NA
## 3                     NA                                   NA
## 4                     NA                                   NA
## 5                     NA                                   NA
## 6                     NA                                   NA
## 7                     NA                                   NA
## 8                     NA                                   NA
## 9                     NA                                   NA
## 10                    NA                                   NA
## 11                    NA                                   NA
## 12                    NA                                   NA
## 13                    NA                                   NA
## 14                    NA                                   NA
## 15                    NA                                   NA
## 16                    NA                                   NA
## 17                    NA                                   NA
## 18                    NA                                   NA
## 19                    NA                                   NA
## 20                    NA                                   NA

Remove NA elements from data

df <- df[complete.cases(df$Rainfall_Bastia_Umbra),]
rownames(df) <- 1:nrow(df)
head(df, n=20)
##       п.їDate Rainfall_Bastia_Umbra Depth_to_Groundwater_P24
## 1  01/01/2009                   0.0                   -31.96
## 2  02/01/2009                   0.0                   -32.03
## 3  03/01/2009                   0.0                   -31.97
## 4  04/01/2009                   0.0                   -31.91
## 5  05/01/2009                   0.0                   -31.94
## 6  06/01/2009                   0.0                   -31.89
## 7  07/01/2009                   0.0                   -31.91
## 8  08/01/2009                   0.0                   -31.83
## 9  09/01/2009                   0.9                   -31.80
## 10 10/01/2009                   0.0                   -31.76
## 11 11/01/2009                   0.0                   -31.70
## 12 12/01/2009                   0.0                   -31.57
## 13 13/01/2009                   1.1                   -31.54
## 14 14/01/2009                   0.0                   -31.44
## 15 15/01/2009                   0.0                   -31.26
## 16 16/01/2009                   0.0                   -31.50
## 17 17/01/2009                   0.0                   -31.61
## 18 18/01/2009                   0.1                   -31.15
## 19 19/01/2009                   0.1                   -30.95
## 20 20/01/2009                   0.0                   -30.93
##    Depth_to_Groundwater_P25 Temperature_Bastia_Umbra Temperature_Petrignano
## 1                    -31.14                      5.2                    4.9
## 2                    -31.11                      2.3                    2.5
## 3                    -31.07                      4.4                    3.9
## 4                    -31.05                      0.8                    0.8
## 5                    -31.01                     -1.9                   -2.1
## 6                    -31.00                     -0.7                   -0.7
## 7                    -30.96                      1.5                   -0.3
## 8                    -30.94                      4.3                    6.6
## 9                    -30.93                      4.9                    4.8
## 10                   -30.87                      1.9                    4.2
## 11                   -30.83                      3.4                    4.5
## 12                   -30.74                      3.3                    3.9
## 13                   -30.63                      6.0                    6.3
## 14                   -30.55                      8.2                    8.1
## 15                   -30.57                      7.8                    7.1
## 16                   -30.61                      5.1                    5.3
## 17                   -30.68                      1.6                    1.6
## 18                   -30.41                      6.4                    6.4
## 19                   -30.28                     10.3                   10.3
## 20                   -30.21                     12.1                   12.0
##    Volume_C10_Petrignano Hydrometry_Fiume_Chiascio_Petrignano
## 1              -24530.69                                  2.4
## 2              -28785.89                                  2.5
## 3              -25766.21                                  2.4
## 4              -27919.30                                  2.4
## 5              -29854.66                                  2.3
## 6              -29124.58                                  2.3
## 7              -31173.12                                  2.3
## 8              -30232.22                                  2.4
## 9              -30597.70                                  2.3
## 10             -31337.28                                  2.3
## 11             -29845.15                                  2.3
## 12             -28745.28                                  2.3
## 13             -28932.77                                  2.3
## 14             -28600.13                                  2.3
## 15             -24973.06                                  2.3
## 16             -31388.26                                  2.3
## 17             -30941.57                                  2.3
## 18             -23043.74                                  2.3
## 19             -21658.75                                  2.3
## 20             -23793.70                                  2.3
df <- subset(df, select = -c(Depth_to_Groundwater_P25, Temperature_Petrignano ))
colnames(df) <- c("Date", "Rainfall", "Depth_to_Groundwater","Temperature", " Volume", " Hydrometry" )
head(df, n=20)
##          Date Rainfall Depth_to_Groundwater Temperature    Volume  Hydrometry
## 1  01/01/2009      0.0               -31.96         5.2 -24530.69         2.4
## 2  02/01/2009      0.0               -32.03         2.3 -28785.89         2.5
## 3  03/01/2009      0.0               -31.97         4.4 -25766.21         2.4
## 4  04/01/2009      0.0               -31.91         0.8 -27919.30         2.4
## 5  05/01/2009      0.0               -31.94        -1.9 -29854.66         2.3
## 6  06/01/2009      0.0               -31.89        -0.7 -29124.58         2.3
## 7  07/01/2009      0.0               -31.91         1.5 -31173.12         2.3
## 8  08/01/2009      0.0               -31.83         4.3 -30232.22         2.4
## 9  09/01/2009      0.9               -31.80         4.9 -30597.70         2.3
## 10 10/01/2009      0.0               -31.76         1.9 -31337.28         2.3
## 11 11/01/2009      0.0               -31.70         3.4 -29845.15         2.3
## 12 12/01/2009      0.0               -31.57         3.3 -28745.28         2.3
## 13 13/01/2009      1.1               -31.54         6.0 -28932.77         2.3
## 14 14/01/2009      0.0               -31.44         8.2 -28600.13         2.3
## 15 15/01/2009      0.0               -31.26         7.8 -24973.06         2.3
## 16 16/01/2009      0.0               -31.50         5.1 -31388.26         2.3
## 17 17/01/2009      0.0               -31.61         1.6 -30941.57         2.3
## 18 18/01/2009      0.1               -31.15         6.4 -23043.74         2.3
## 19 19/01/2009      0.1               -30.95        10.3 -21658.75         2.3
## 20 20/01/2009      0.0               -30.93        12.1 -23793.70         2.3

Plot data

library('ggplot2')
library('forecast')
library('zoo')
library('dplyr')
library('data.table')
library('imputeTS')
library('xts')
library('tseries')
library('stats')
library('nlme')
library('fpp')
library('lubridate')
library('TSstudio')

Data preparation

# sort by date
df$Date <- as.Date(df$Date, format= "%d/%m/%Y")
df <- df[order(df$Date), ]
interval = df$Date - shift(df$Date, n=1, fill=NA, type="lag")
for(i in interval)
{
  
  if(isTRUE(i > 1) || is.null(i))
  {
    print(i)
  }
    
}
#depth <- diff(depth)
depth <- na_interpolation(df$Depth_to_Groundwater, option = "linear")

depth_ts <- ts(depth, start = df$Date[1], frequency=365)
# see that it is not statonary
adf.test(depth,alternative="stationary")
## 
##  Augmented Dickey-Fuller Test
## 
## data:  depth
## Dickey-Fuller = -1.8074, Lag order = 16, p-value = 0.6599
## alternative hypothesis: stationary
# box cox func
lambda <- BoxCox.lambda(depth_ts)
## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением

## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением

## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением

## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением

## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением

## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением

## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением

## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением

## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением

## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением

## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением

## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением

## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением

## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением

## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением

## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением

## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением

## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением

## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением

## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением

## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением

## Warning in optimize(guer.cv, c(lower, upper), x = x, nonseasonal.length =
## nonseasonal.length): NA/Inf заменены максимальным положительным значением
lambda
## [1] 1.999959
tmp <- BoxCox(depth_ts, lambda = lambda)
plot.ts(BoxCox(depth_ts, lambda = lambda))

# diff to get stationary
depth_ts <- diff(tmp)
# it is really stationary
adf.test(depth_ts,alternative="stationary")
## Warning in adf.test(depth_ts, alternative = "stationary"): p-value smaller than
## printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  depth_ts
## Dickey-Fuller = -11.798, Lag order = 16, p-value = 0.01
## alternative hypothesis: stationary

lambda=1.999959 plot decomposed data

ts_decompose(depth_ts)
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

look at seasonal

ts_seasonal(depth_ts, type = "all")
## Warning: No trace type specified and no positional attributes specified
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
ts_plot(depth_ts)

we can make some decisions from the seasonalty.

As we can see the minimum value of the depth is in November,December.

rainfall <- na_interpolation(df$Rainfall, option = "linear")
rainfall_ts <- ts(rainfall, start = df$Date[1], frequency=12)
rainfall_ts <- diff(rainfall_ts)
ts_seasonal(rainfall_ts, type = "all")
temperature <- na_interpolation(df$Temperature, option = "linear")
temperature_ts <- ts(temperature, start = df$Date[1], frequency=12)
temperature_ts <- diff(temperature_ts)
ts_seasonal(temperature_ts, type = "all")

From the plot we can see that the maximum temperature was in August, minimum in December

volume <- na_interpolation(df$` Volume`, option = "linear")
volume_ts <- ts(volume, start = df$Date[1], frequency=12)
volume_ts <- diff(volume_ts)
ts_seasonal(volume_ts, type = "all")

Maximum volume was in March, minimum in December

hydrometry <- na_interpolation(df$` Hydrometry`, option = "linear")
hydrometry_ts <- ts(hydrometry, start = df$Date[1], frequency=12)
hydrometry_ts <- diff(hydrometry_ts)
ts_seasonal(hydrometry_ts, type = "all")

Maximum in August, minimum in January.

The volume and hydrometry reached their minimum around the same time

FOR PREDICT NEEDS:

  1. прогнозиорвать полследнее щначение тренда
  2. сезонность прогноз
  3. остатки - арима
  4. все складываем и предсказываем

MAKE ARIMA FOR RESIDUALS

# for resisuals(noise) 
h1 <- 1500 # the length of the testing partition
h2 <- 770
decomposed_ts <- decompose(depth_ts, type=c("additive"))
USgas_split <- ts_split(decomposed_ts$random, sample.out = h1)
train <- na_interpolation(USgas_split$train, option = "linear")
test <- na_interpolation(USgas_split$test, option = "linear")

ts_info(train)
##  The train series is a ts object with 1 variable and 2698 observations
##  Frequency: 365 
##  Start time: 14245 2 
##  End time: 14252 144
ts_info(test)
##  The test series is a ts object with 1 variable and 1500 observations
##  Frequency: 365 
##  Start time: 14252 145 
##  End time: 14256 184
acf(train)

pacf(train)

arima_1 <- Arima(train, order=c(3,1,1))
arima_1
## Series: train 
## ARIMA(3,1,1) 
## 
## Coefficients:
##           ar1      ar2      ar3      ma1
##       -0.1262  -0.1635  -0.0635  -0.9380
## s.e.   0.0202   0.0199   0.0200   0.0066
## 
## sigma^2 estimated as 8.113:  log likelihood=-6649.29
## AIC=13308.59   AICc=13308.61   BIC=13338.09
tsdisplay(residuals(arima_1), lag.max = 20)

fc1 <- forecast(arima_1,h=h1)
accuracy(fc1, test)
##                       ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -0.05881447 2.845701 2.029092  79.55234 165.7098 0.5855924
## Test set     -0.19685650 3.113553 2.314139 101.19785 101.9243 0.6678566
##                      ACF1 Theil's U
## Training set -0.006850946        NA
## Test set     -0.049299903   1.00239
test_forecast(forecast.obj = fc1, actual = depth_ts, test = test)